{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import glob\n",
    "import pandas as pd\n",
    "import time\n",
    "from PIL import Image"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "verbosity     = 1\n",
    "which_dataset = 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "if   which_dataset == 0:\n",
    "    root_dir = \"/run/media/hoosiki/WareHouse2/home/mtb/datasets/vggface2/train_mtcnnpy_182\"\n",
    "elif which_dataset == 1:\n",
    "    root_dir = \"/run/media/hoosiki/WareHouse2/home/mtb/datasets/vggface2/test_mtcnnpy_182\"\n",
    "elif which_dataset == 2:\n",
    "    root_dir = \"/run/media/hoosiki/WareHouse2/home/mtb/datasets/lfw/lfw_mtcnnpy_182\"\n",
    "else:\n",
    "    root_dir = \"/run/media/hoosiki/WareHouse2/home/mtb/datasets/my_pictures/my_pictures_mtcnnpy_182\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "files = glob.glob(root_dir+\"/*/*\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "time0 = time.time()\n",
    "df = pd.DataFrame()\n",
    "print(\"The number of files: \", len(files))\n",
    "for idx, file in enumerate(files):\n",
    "    if idx%10000 == 0:\n",
    "        print(\"[{}/{}]\".format(idx, len(files)-1))\n",
    "    '''    \n",
    "    try:\n",
    "        img = Image.open(file) # open the image file\n",
    "        img.verify() # verify that it is, in fact, an image\n",
    "    except (IOError, SyntaxError) as e:\n",
    "        if verbosity == 1:\n",
    "            print('Bad file:', file) # print out the names of corrupt files\n",
    "        pass\n",
    "    else:\n",
    "        face_id    = os.path.basename(file).split('.')[0]\n",
    "        face_label = os.path.basename(os.path.dirname(file))\n",
    "        df = df.append({'id': face_id, 'name': face_label}, ignore_index = True)\n",
    "    '''    \n",
    "    face_id    = os.path.basename(file).split('.')[0]\n",
    "    face_label = os.path.basename(os.path.dirname(file))\n",
    "    df = df.append({'id': face_id, 'name': face_label}, ignore_index = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df = df.sort_values(by = ['name', 'id']).reset_index(drop = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df['class'] = pd.factorize(df['name'])[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "if which_dataset == 0:\n",
    "    df.to_csv(\"train_vggface2.csv\", index = False)\n",
    "elif which_dataset == 1:\n",
    "    df.to_csv(\"test_vggface2.csv\", index = False)\n",
    "elif which_dataset == 2:\n",
    "    df.to_csv(\"valid_lfw.csv\", index = False)\n",
    "else:\n",
    "    df.to_csv(\"my_pictures.csv\", index = False)\n",
    "\n",
    "elapsed_time = time.time() - time0\n",
    "print(\"elapsted time: \", elapsed_time//3600, \"h\", elapsed_time%3600//60, \"m\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
